%load_ext pretty_jupyter
import patsy as pa
import tensorflow as tf
import tensorflow_probability as tfp
import numpy as np
tfd = tfp.distributions
from MakeMyPrior.elicitation_wrapper import expert_model
from MakeMyPrior.training import trainer
from MakeMyPrior.helper_functions import group_obs, Exponential_unconstrained, Normal_unconstrained
from MakeMyPrior.user_config import target_config, target_input
from MakeMyPrior.helper_functions import print_restab, plot_priors_hyp, plot_expert_preds, group_stats, plot_priors_flow
import MakeMyPrior.combine_losses
from tabulate import tabulate
import pprint
pp = pprint.PrettyPrinter(depth=4)
tf.random.set_seed(2024)
Background: Case Study¶
We focus on an experiment conducted by Unkelbach & Rom (2020)[REF], where participants are presented with general knowledge statements in two consecutive phases. During the second phase, they are required to indicate whether each statement is true or false. The response variable of interest is the proportion of true judgments (PTJs). The main objective of the study is to investigate the influence of two factors on PTJs: (1) repetition (ReP), which involves presenting some statements from the first phase again in the second phase, and (2) encoding depth (EnC), where participants are divided into groups varying in the level of elaboration required for initial sentence processing in the first phase. We consider a 2 (ReP:repeated, new) $\times$ 3 (EnC: shallow, standard, deep) between-subject factorial design with treatment contrasts used for both factors. The baseline levels are set as new for ReP and deep for EnC.
Data generating model¶
\begin{align*} y_i &\sim \text{Normal}(\theta_i, s)\\ \theta_i &= \beta_0 + \beta_1x_1 + \beta_2x_2 + \beta_3x_3 + \beta_4x_4 + \beta_5x_5\\ \beta_k &\sim \text{Normal}(\mu_k, \sigma_k) \quad \text{for }k=0,\ldots, 5\\ s &\sim \text{Exponential}(\nu) \end{align*}Here, $𝑦_𝑖$ represents the responses for each observation $𝑖 = 1, \ldots , 𝑁$, assumed to follow a normal distribution with mean $\theta_𝑖$ and standard deviation $𝑠$. The expected value $\theta_𝑖$ is modeled as a linear function of two categorical predictors, ReP and EnC. The regression coefficients $\beta_𝑘$ for $𝑘 = 0, \ldots , 5$ are assigned normal prior distributions. Specifically, $\beta_0$ represents the PTJs for condition new-deep, while $\beta_1$ represents the difference in PTJs ($\Delta$PTJ) between repeated and new statements. Additionally, $\beta_2$ captures the $\Delta$PTJ for new statements in the shallow- vs. standard-encoding condition, $\beta_3$ represents the $\Delta$PTJ for new statements in the shallow- vs. deep-encoding condition, and $\beta_4$ and $\beta_5$ account for the interaction effects between ReP and EnC. The standard deviation 𝑠 of the normal likelihood follows an Exponential prior distribution with rate parameter $\nu$.
Methodology: Workflow¶
For each mini-batch:
- $i$ observations with $i = 1, \ldots, N$,
- $k$ regression coefficients with $k = 0,\ldots,6$,
- $s$ samples from each prior distribution with $s=1,\ldots, M$
- $g$ group means with $g = 1,\ldots,6$ (corresponding to a $2 \times 3$ factorial design
General procedure:
- Draw samples from prior distribution(s) of model parameters
- Generate prior predictions according to the data generating model
- Compute the pre-defined target quantities
- Compute the elicited statistics of the target quantities
- Measure the discrepancy between the model-implied and the expert elicited statistics
- Update the weights of the transformation function of the normalizing flow
Method: Normalizing Flow¶
Method specification¶
- PriorSamples($\lambda$): \begin{align*} (\{\beta_k\}_s,\{s\}_s) &\sim p_\phi(\lambda)\\ \end{align*}
- Generator($\beta_k, s$): \begin{align*} \{\theta_i\}_s &= \{\beta_k\}_s \times X_i\\ \bar{s} &= \frac{1}{M} \sum_{i=1}^M \{s\}_s\\ \{y_i\}_s &\sim \text{Normal}(\{\theta_i\}_s, \bar{s})\\ \end{align*}
- Targets($\{y_i\}_s$): \begin{align*} \{\bar{y}_{g}\}_s &= \frac{1}{N_{g}} \sum_{i=1}^{N_{g}} \{y_i\}_s \quad \text{ for } g = 1,\ldots,6\\ \{\Delta_{eff}\}_s &= \{\bar{y}_{g1}\}_s - \{\bar{y}_{g2}\}_s \quad \text{ for } g1 = 3,4,5 \text{ and } g2 = 0,1,2\\ \{R^2\}_s &= \frac{Var(\{\theta_i\}_s)}{Var(\{y_i\}_s)} \end{align*}
- Elicits($\{\bar{y}_{g}\}_s, \{\Delta_{eff}\}_s, \{R^2\}_s$) \begin{align*} \text{Quantile-based:} \quad &Q_p^{g} = Q_{p}^{g} \{y_i\}_s \quad \text{ for } g = 1,\ldots, 6, p = 0.1, \ldots, 0.9\\ \text{Quantile-based:} \quad &Q_p^{eff} = Q_{p}^{eff} \{\Delta_{eff}\}_s \quad \text{ for } eff = 1,2,3; p = 0.1, \ldots, 0.9\\ \text{Histogram-based:} \quad &\{R^2\}_s =\{R^2\}_s \end{align*}
User specification¶
Setting hyperparameter for the learning algorithm¶
# -.-|m { input: true, output_error: false, input_fold: show }
# setting of hyperparameter of learning algorithm
user_config = dict(
B = 2**8,
rep = 200,
epochs = 300,
view_ep = 30,
lr_decay = True,
lr0 = 0.001,
lr_min = 0.000001,
loss_dimensions = "m,n:B",
loss_discrepancy = "energy",
loss_scaling = "unscaled",
method = "normalizing_flow"
)
Define the design matrix¶
# construct design matrix
X = pa.dmatrix("a*b", pa.balanced(a = 2, b = 3, repeat = 60), return_type="dataframe")
dmatrix = tf.cast(X, dtype = tf.float32)
# extract contrast matrix from design matrix (for allocating observations to groups)
cmatrix = dmatrix[0:dmatrix.shape[1], :]
print(X)
Intercept a[T.a2] b[T.b2] b[T.b3] a[T.a2]:b[T.b2] a[T.a2]:b[T.b3] 0 1.0 0.0 0.0 0.0 0.0 0.0 1 1.0 0.0 1.0 0.0 0.0 0.0 2 1.0 0.0 0.0 1.0 0.0 0.0 3 1.0 1.0 0.0 0.0 0.0 0.0 4 1.0 1.0 1.0 0.0 1.0 0.0 .. ... ... ... ... ... ... 355 1.0 0.0 1.0 0.0 0.0 0.0 356 1.0 0.0 0.0 1.0 0.0 0.0 357 1.0 1.0 0.0 0.0 0.0 0.0 358 1.0 1.0 1.0 0.0 1.0 0.0 359 1.0 1.0 0.0 1.0 0.0 1.0 [360 rows x 6 columns]
Setup the 'ideal agent'¶
Define an 'arbitrary' ground truth for the hyperparameter values for method validation.
# true hyperparameter values for ideal_expert
true_values = dict()
true_values["mu"] = [0.12, 0.15, -0.02, -0.03, -0.02, -0.04]
true_values["sigma"] = [0.02, 0.02, 0.06, 0.06, 0.03, 0.03]
true_values["nu"] = 9.
# model parameters
parameters_dict = dict()
for i in range(6):
parameters_dict[f"beta_{i}"] = {
"true": tfd.Normal(true_values["mu"][i], true_values["sigma"][i]),
"initialization": [tfd.Normal(0.,0.1)]*2
}
parameters_dict["sigma"] = {
"true": tfd.Exponential(true_values["nu"]),
"initialization": [tfd.Normal(0.,0.1)]
}
print("True hyperparameter values:")
pp.pprint(true_values)
True hyperparameter values:
{'mu': [0.12, 0.15, -0.02, -0.03, -0.02, -0.04],
'nu': 9.0,
'sigma': [0.02, 0.02, 0.06, 0.06, 0.03, 0.03]}
Define the data generating model¶
# -.-|m { input: true, output_error: false, input_fold: show }
# generative model
class GenerativeModel(tf.Module):
def __call__(self,
parameters, # obligatory: samples from prior distributions; tf.Tensor
dmatrix, # optional: design matrix; tf.Tensor
cmatrix, # optional: contrast matrix; tf.Tensor
**kwargs # obligatory: possibility for further keyword arguments is needed
):
# transformed sigma parameter
sigma_avg = tf.math.reduce_mean(tf.exp(parameters[:,:,-1]), axis = 1)[:,None,None]
# compute linear predictor term
epred = parameters[:,:,0:6] @ tf.transpose(dmatrix)
# define likelihood
likelihood = tfd.Normal(
loc = epred,
scale = tf.broadcast_to(sigma_avg, shape = epred.shape)
)
# sample prior predictive data
ypred = likelihood.sample()
# compute custom target quantity (here: group-differences)
samples_grouped = group_obs(ypred, dmatrix, cmatrix)
# compute mean difference between groups
effect_list = []
diffs = [(3,0), (4,1), (5,2)]
for i in range(len(diffs)):
# compute group difference
diff = tf.math.subtract(
samples_grouped[:, :, :, diffs[i][0]],
samples_grouped[:, :, :, diffs[i][1]]
)
# average over individual obs within each group
diff_mean = tf.reduce_mean(diff, axis=2)
# collect all mean group differences
effect_list.append(diff_mean)
mean_effects = tf.stack(effect_list, axis=-1)
return dict(likelihood = likelihood, # obligatory: likelihood; callable
ypred = ypred, # obligatory: prior predictive data
epred = epred, # obligatory: samples from linear predictor
mean_effects = mean_effects, # optional: custom target quantity
)
Specify the target quantities and the elicitation technique¶
# define a custom function using the output from the generative model
def custom_r2(ypred, epred, **kwargs):
return tf.math.divide(tf.math.reduce_variance(epred, axis = -1),
tf.math.reduce_variance(ypred, axis = -1))
# specify target quantity, elicitation technique and loss combination
t1 = target_config(target="R2",
elicitation="histogram",
combine_loss="all",
custom_target_function = custom_r2)
t2 = target_config(target="group_means",
elicitation="quantiles",
combine_loss="by-group",
quantiles_specs = (10, 20, 30, 40, 50, 60, 70, 80, 90))
t3 = target_config(target="mean_effects",
elicitation="quantiles",
combine_loss="by-group",
quantiles_specs = (10, 20, 30, 40, 50, 60, 70, 80, 90))
target_info = target_input(t1, t2, t3)
pp.pprint(target_info)
{'combine_loss': ['all', 'by-group', 'by-group'],
'custom_target_function': [<function custom_r2 at 0x0000024BB5197D80>,
None,
None],
'elicitation': ['histogram', 'quantiles', 'quantiles'],
'internal_loss': [None, None, None],
'quantiles_specs': [(10, 20, 30, 40, 50, 60, 70, 80, 90),
(10, 20, 30, 40, 50, 60, 70, 80, 90)],
'target': ['R2', 'group_means', 'mean_effects']}
Simulate from the "ideal" expert¶
# -.-|m { input: true, output_error: false, input_fold: show }
expert_res_list, prior_pred_res = expert_model(1, user_config["rep"],
parameters_dict, GenerativeModel, target_info,
method = "ideal_expert_nf",
dmatrix = dmatrix,
cmatrix = cmatrix,
dmatrix_fct = dmatrix)
# elicited statistics from the (ideal) expert
expert_res_list.keys()
dict_keys(['R2_hist_0', 'group_means_quant_1', 'mean_effects_quant_2'])
Expert predictions¶
# group means
print(group_stats(prior_pred_res["ypred"], num_groups = 6))
# mean effects
print("mean effects:")
print(np.round(tf.reduce_mean(prior_pred_res["mean_effects"],(0,1)),2))
+-------+------+------+ | group | mean | std | +-------+------+------+ | gr_0 | 0.12 | 0.02 | | gr_1 | 0.1 | 0.06 | | gr_2 | 0.09 | 0.07 | | gr_3 | 0.27 | 0.03 | | gr_4 | 0.22 | 0.07 | | gr_5 | 0.2 | 0.08 | +-------+------+------+ mean effects: [0.15 0.13 0.11]
# plot expert predictions
plot_expert_preds(expert_res_list)
Learn the prior distributions¶
res_dict = trainer(expert_res_list, user_config["B"], user_config["rep"],
parameters_dict, user_config["method"], GenerativeModel,
target_info, user_config, loss_balancing = True, save_vals = ["prior_preds"],
dmatrix = dmatrix, cmatrix = cmatrix, dmatrix_fct = dmatrix)
epoch_time: 1117.474ms Epoch: 0, loss: 0.76864, lr: 0.001000 epoch_time: 304.150ms Epoch: 30, loss: 0.02586, lr: 0.000531 epoch_time: 268.193ms Epoch: 60, loss: 0.01175, lr: 0.000282 epoch_time: 283.451ms Epoch: 90, loss: 0.00847, lr: 0.000150 epoch_time: 289.710ms Epoch: 120, loss: 0.00889, lr: 0.000080 epoch_time: 275.654ms Epoch: 150, loss: 0.00839, lr: 0.000042 epoch_time: 268.742ms Epoch: 180, loss: 0.00846, lr: 0.000023 epoch_time: 277.343ms Epoch: 210, loss: 0.00893, lr: 0.000012 epoch_time: 266.057ms Epoch: 240, loss: 0.00814, lr: 0.000006 epoch_time: 258.379ms Epoch: 270, loss: 0.00922, lr: 0.000003
Results¶
Loss function¶
import matplotlib.pyplot as plt
import numpy as np
plt.plot(np.arange(len(res_dict["loss_info"])), res_dict["loss_info"])
plt.xlabel("epochs")
plt.ylabel("energy loss (weighted sum)")
plt.show()
Summary of model parameters¶
sig = tf.expand_dims(tf.exp(res_dict["priors_info"][0]["priors"][:,:,-1]),-1)
prior_samples = tf.concat([res_dict["priors_info"][0]["priors"][:,:,0:6], sig],-1)
print_restab(method = "normalizing_flow", res = prior_samples,
precision = 2, true_mu = true_values["mu"], true_sigma = true_values["sigma"],
true_nu = true_values["nu"], names = [f"beta{i}" for i in range(6)]+["sigma"])
| var_name | pred mean | pred std | true mean | err mean | true std | err std |
|---|---|---|---|---|---|---|
| beta0 | 0.12 | 0.02 | 0.12 | 0.0 | 0.02 | 0.0 |
| beta1 | 0.15 | 0.02 | 0.15 | 0.0 | 0.02 | 0.0 |
| beta2 | -0.03 | 0.07 | -0.02 | 0.01 | 0.06 | 0.01 |
| beta3 | -0.03 | 0.08 | -0.03 | 0.0 | 0.06 | 0.02 |
| beta4 | -0.02 | 0.04 | -0.02 | 0.0 | 0.03 | 0.01 |
| beta5 | -0.04 | 0.04 | -0.04 | 0.0 | 0.03 | 0.01 |
| sigma | 0.12 | 0.05 | 0.11 | 0.01 | 0.11 | 0.06 |
Learned prior distributions¶
plot_priors_flow(prior_samples, true_values["mu"], true_values["sigma"], true_values["nu"])
Method: Hyperparameter Learning¶
Method specification¶
- PriorSamples($\lambda = (\mu_k, \sigma_k, \nu)$): \begin{align*} \{\beta_k\}_s &\sim \text{Normal}(\mu_k, \exp\{\sigma_k\})\\ \{s\}_s &\sim \text{Exponential}(\nu) \end{align*}
- Generator($\beta_k, s$): \begin{align*} \{\theta_i\}_s &= \{\beta_k\}_s \times X_i\\ \bar{s} &= \frac{1}{M} \sum_{i=1}^M \{s\}_s\\ \{y_i\}_s &\sim \text{Normal}(\{\theta_i\}_s, \bar{s})\\ \end{align*}
- Targets($\{y_i\}_s$): \begin{align*} \{\bar{y}_{g}\}_s &= \frac{1}{N_{g}} \sum_{i=1}^{N_{g}} \{y_i\}_s \quad \text{ for } g = 1,\ldots,6\\ \{\Delta_{eff}\}_s &= \{\bar{y}_{g1}\}_s - \{\bar{y}_{g2}\}_s \quad \text{ for } g1 = 3,4,5 \text{ and } g2 = 0,1,2\\ \{R^2\}_s &= \frac{Var(\{\theta_i\}_s)}{Var(\{y_i\}_s)} \end{align*}
- Elicits($\{\bar{y}_{g}\}_s, \{\Delta_{eff}\}_s, \{R^2\}_s$) \begin{align*} \text{Quantile-based:} \quad &Q_p^{g} = Q_{p}^{g} \{y_i\}_s \quad \text{ for } g = 1,\ldots, 6, p = 0.1, \ldots, 0.9\\ \text{Quantile-based:} \quad &Q_p^{eff} = Q_{p}^{eff} \{\Delta_{eff}\}_s \quad \text{ for } eff = 1,2,3; p = 0.1, \ldots, 0.9\\ \text{Histogram-based:} \quad &\{R^2\}_s =\{R^2\}_s \end{align*}
User specification¶
Setting hyperparameter for the learning algorithm¶
# -.-|m { input: true, output_error: false, input_fold: show }
# setting of hyperparameter of learning algorithm
user_config = dict(
B = 2**8,
rep = 200,
epochs = 300,
view_ep = 30,
lr_decay = True,
lr0 = 0.1,
lr_min = 0.0001,
loss_dimensions = "m,n:B",
loss_discrepancy = "energy",
loss_scaling = "unscaled",
method = "hyperparameter_learning"
)
Define the design matrix¶
# construct design matrix
X = pa.dmatrix("a*b", pa.balanced(a = 2, b = 3, repeat = 60), return_type="dataframe")
dmatrix = tf.cast(X, dtype = tf.float32)
# extract contrast matrix from design matrix (for allocating observations to groups)
cmatrix = dmatrix[0:dmatrix.shape[1], :]
print(X)
Intercept a[T.a2] b[T.b2] b[T.b3] a[T.a2]:b[T.b2] a[T.a2]:b[T.b3] 0 1.0 0.0 0.0 0.0 0.0 0.0 1 1.0 0.0 1.0 0.0 0.0 0.0 2 1.0 0.0 0.0 1.0 0.0 0.0 3 1.0 1.0 0.0 0.0 0.0 0.0 4 1.0 1.0 1.0 0.0 1.0 0.0 .. ... ... ... ... ... ... 355 1.0 0.0 1.0 0.0 0.0 0.0 356 1.0 0.0 0.0 1.0 0.0 0.0 357 1.0 1.0 0.0 0.0 0.0 0.0 358 1.0 1.0 1.0 0.0 1.0 0.0 359 1.0 1.0 0.0 1.0 0.0 1.0 [360 rows x 6 columns]
Setup the 'ideal agent'¶
Define an 'arbitrary' ground truth for the hyperparameter values for method validation.
# true hyperparameter values for ideal_expert
true_values = dict()
true_values["mu"] = [0.12, 0.15, -0.02, -0.03, -0.02, -0.04]
true_values["sigma"] = [0.02, 0.02, 0.06, 0.06, 0.03, 0.03]
true_values["nu"] = 9.
# model parameters
parameters_dict = dict()
for i in range(6):
parameters_dict[f"beta_{i}"] = {
"family": Normal_unconstrained(),
"true": tfd.Normal(true_values["mu"][i], true_values["sigma"][i]),
"initialization": [tfd.Normal(0.,0.1)]*2
}
exp_dist = Exponential_unconstrained(user_config["rep"])
parameters_dict["sigma"] = {
"family": exp_dist,
"true": exp_dist(tf.math.log(true_values["nu"])),
"initialization": [tfd.Normal(0.,0.1)]
}
print("True hyperparameter values:")
pp.pprint(true_values)
True hyperparameter values:
{'mu': [0.12, 0.15, -0.02, -0.03, -0.02, -0.04],
'nu': 9.0,
'sigma': [0.02, 0.02, 0.06, 0.06, 0.03, 0.03]}
Define the data generating model¶
# -.-|m { input: true, output_error: false, input_fold: show }
# generative model
class GenerativeModel(tf.Module):
def __call__(self,
parameters, # obligatory: samples from prior distributions; tf.Tensor
dmatrix, # optional: design matrix; tf.Tensor
cmatrix, # optional: contrast matrix; tf.Tensor
**kwargs # obligatory: possibility for further keyword arguments is needed
):
# compute linear predictor term
epred = parameters[:,:,0:6] @ tf.transpose(dmatrix)
# define likelihood
likelihood = tfd.Normal(
loc = epred,
scale = tf.expand_dims(parameters[:,:,-1], -1))
# sample prior predictive data
ypred = likelihood.sample()
# compute custom target quantity (here: group-differences)
samples_grouped = group_obs(ypred, dmatrix, cmatrix)
# compute mean difference between groups
effect_list = []
diffs = [(3,0), (4,1), (5,2)]
for i in range(len(diffs)):
# compute group difference
diff = tf.math.subtract(
samples_grouped[:, :, :, diffs[i][0]],
samples_grouped[:, :, :, diffs[i][1]]
)
# average over individual obs within each group
diff_mean = tf.reduce_mean(diff, axis=2)
# collect all mean group differences
effect_list.append(diff_mean)
mean_effects = tf.stack(effect_list, axis=-1)
return dict(likelihood = likelihood, # obligatory: likelihood; callable
ypred = ypred, # obligatory: prior predictive data
epred = epred, # obligatory: samples from linear predictor
mean_effects = mean_effects # optional: custom target quantity
)
Specify the target quantities and the elicitation technique¶
# define a custom function using the output from the generative model
def custom_r2(ypred, epred, **kwargs):
return tf.math.divide(tf.math.reduce_variance(epred, axis = -1),
tf.math.reduce_variance(ypred, axis = -1))
# specify target quantity, elicitation technique and loss combination
t1 = target_config(target="R2",
elicitation="histogram",
combine_loss="all",
custom_target_function = custom_r2)
t2 = target_config(target="group_means",
elicitation="quantiles",
combine_loss="by-group",
quantiles_specs = (10, 20, 30, 40, 50, 60, 70, 80, 90))
t3 = target_config(target="mean_effects",
elicitation="quantiles",
combine_loss="by-group",
quantiles_specs = (10, 20, 30, 40, 50, 60, 70, 80, 90))
target_info = target_input(t1, t2, t3)
pp.pprint(target_info)
{'combine_loss': ['all', 'by-group', 'by-group'],
'custom_target_function': [<function custom_r2 at 0x0000024BFD075800>,
None,
None],
'elicitation': ['histogram', 'quantiles', 'quantiles'],
'internal_loss': [None, None, None],
'quantiles_specs': [(10, 20, 30, 40, 50, 60, 70, 80, 90),
(10, 20, 30, 40, 50, 60, 70, 80, 90)],
'target': ['R2', 'group_means', 'mean_effects']}
Simulate from the "ideal" expert¶
# -.-|m { input: true, output_error: false, input_fold: show }
expert_res_list, prior_pred_res = expert_model(1, user_config["rep"],
parameters_dict, GenerativeModel, target_info,
method = "ideal_expert",
dmatrix = dmatrix,
cmatrix = cmatrix,
dmatrix_fct = dmatrix)
# elicited statistics from the (ideal) expert
expert_res_list.keys()
dict_keys(['R2_hist_0', 'group_means_quant_1', 'mean_effects_quant_2'])
Expert predictions¶
# group means
print(group_stats(prior_pred_res["ypred"], num_groups = 6))
# mean effects
print("mean effects:")
print(np.round(tf.reduce_mean(prior_pred_res["mean_effects"],(0,1)),2))
+-------+------+------+ | group | mean | std | +-------+------+------+ | gr_0 | 0.12 | 0.02 | | gr_1 | 0.1 | 0.07 | | gr_2 | 0.09 | 0.06 | | gr_3 | 0.27 | 0.03 | | gr_4 | 0.22 | 0.07 | | gr_5 | 0.19 | 0.07 | +-------+------+------+ mean effects: [0.15 0.12 0.11]
# plot expert predictions
plot_expert_preds(expert_res_list)
Learn the prior distributions¶
res_dict = trainer(expert_res_list, user_config["B"], user_config["rep"],
parameters_dict, user_config["method"], GenerativeModel,
target_info, user_config, loss_balancing = True, save_vals = ["prior_preds"],
dmatrix = dmatrix, cmatrix = cmatrix, dmatrix_fct = dmatrix)
epoch_time: 462.108ms Epoch: 0, loss: 0.74886, lr: 0.100000 epoch_time: 105.835ms Epoch: 30, loss: 0.05988, lr: 0.053144 epoch_time: 104.903ms Epoch: 60, loss: 0.01880, lr: 0.028243 epoch_time: 109.590ms Epoch: 90, loss: 0.02891, lr: 0.015009 epoch_time: 103.867ms Epoch: 120, loss: 0.01400, lr: 0.007977 epoch_time: 103.787ms Epoch: 150, loss: 0.02049, lr: 0.004239 epoch_time: 112.480ms Epoch: 180, loss: 0.01503, lr: 0.002253 epoch_time: 108.481ms Epoch: 210, loss: 0.01145, lr: 0.001197 epoch_time: 108.643ms Epoch: 240, loss: 0.01789, lr: 0.000636 epoch_time: 108.552ms Epoch: 270, loss: 0.01152, lr: 0.000338
Results¶
Loss function¶
import matplotlib.pyplot as plt
import numpy as np
plt.plot(np.arange(len(res_dict["loss_info"])), res_dict["loss_info"])
plt.xlabel("epochs")
plt.ylabel("energy loss (weighted sum)")
plt.show()
Summary of hyperparameter values¶
true_vals = [j for i in zip(true_values["mu"],true_values["sigma"]) for j in i]
true_vals += [true_values["nu"]]
tab, avg_res = print_restab(method = "hyperparameter_learning",
num_vars = 13,
res = res_dict["hyperparam_info"],
start = 10,
precision = 3,
true_values = true_vals)
tab
| var_name | pred mean | pred std | true | err |
|---|---|---|---|---|
| loc_0 | 0.121 | 0.004 | 0.12 | 0.001 |
| scale_0 | 0.023 | 0.0 | 0.02 | 0.003 |
| loc_1 | 0.149 | 0.004 | 0.15 | 0.001 |
| scale_1 | 0.02 | 0.0 | 0.02 | 0.0 |
| loc_2 | -0.024 | 0.005 | -0.02 | 0.004 |
| scale_2 | 0.06 | 0.0 | 0.06 | 0.0 |
| loc_3 | -0.032 | 0.008 | -0.03 | 0.002 |
| scale_3 | 0.06 | 0.0 | 0.06 | 0.0 |
| loc_4 | -0.028 | 0.005 | -0.02 | 0.008 |
| scale_4 | 0.027 | 0.0 | 0.03 | 0.003 |
| loc_5 | -0.043 | 0.006 | -0.04 | 0.003 |
| scale_5 | 0.031 | 0.0 | 0.03 | 0.001 |
| rate_6 | 9.114 | 0.056 | 9.0 | 0.114 |
Learned prior distributions¶
plot_priors_hyp(avg_res, true_values)